# Title: De-stereotyping Public Performance Evaluation
# Yixin Liu & Chengxin Xu 2022
# International Public Management Journal
# IPMJ Replication file for Study 1

library(ggplot2) # plot
library(pwr) # power
library(ggpubr) # plot
library(effsize) # effect size
library(multiwayvcov) # cluster se
library(lmtest) # cluster se
library(egg) # ggplot(theme_article)
library(cowplot) # ggplot output
library(Rmisc) # plot stat table function
library(stargazer) # latex
library(xtable) # latex
library(broman) # power

####Function####
# figure title box functions
element_textbox <- function(...) {
  el <- element_text(...)
  class(el) <- c("element_textbox", class(el))
  el
}

element_grob.element_textbox <- function(element, ...) {
  text_grob <- NextMethod()
  rect_grob <- element_grob(calc_element("strip.background", theme_bw()))
  
  ggplot2:::absoluteGrob(
    grid::gList(
      element_grob(calc_element("strip.background", theme_bw())),
      text_grob
    ),
    height = grid::grobHeight(text_grob), 
    width = grid::unit(1, "npc")
  )
}

# percentage function
pcentFun <- function(x) {
  res <- na.omit(x)
  100 * (sum(res) / length(res))
}


# Setup working directory
WD = getwd()
setwd(WD)

####Data####
s1<-read.csv("Study1_Liu&Xu_IPMJ.csv")

####Data transformation####
# transform data from individual level to profile level
s1se<-subset(s1,rand!="JE")
s1je<-subset(s1,rand=="JE")
s1se$id<-c(1:647)
s1je$id<-c(1:341)

s1se$perf=s1se$se1
s1se$send=s1se$se2

#####JE####
s1JE<-data.frame("observation" = 1:682,
                 "IndID" = rep(c(1:341),each=2),
                 "Profile" = rep(c(1,2),341),
                 "perf" = NA, "send" = NA,
                 "mode" = "JE",
                 "student" = rep(c("White","Black"),341))

######performance####
###for the 1st profile
for (i in 1:682){
  resp<-as.numeric(s1je[which(s1je[,26]==i),6])
  perf<-which(s1JE$IndID==i & s1JE$Profile==1)
  s1JE$perf[perf]<-resp
}
###for the 2nd profile
for (i in 1:682){
  resp<-as.numeric(s1je[which(s1je[,26]==i),7])
  perf<-which(s1JE$IndID==i & s1JE$Profile==2)
  s1JE$perf[perf]<-resp
}

######school choice####
###for the 1st profile
for (i in 1:682){
  resp<-as.numeric(s1je[which(s1je[,26]==i),8])
  send<-which(s1JE$IndID==i & s1JE$Profile==1)
  s1JE$send[send]<-resp
}
###for the 2nd profile
for (i in 1:682){
  resp<-as.numeric(s1je[which(s1je[,26]==i),9])
  send<-which(s1JE$IndID==i & s1JE$Profile==2)
  s1JE$send[send]<-resp
}

######covariates####
# gender
for (i in 1:682){
  resp<-as.numeric(s1je[which(s1je[,26]==i),11])
  sex<-which(s1JE$IndID==i)
  s1JE$sex[sex]<-resp
}
# race
for (i in 1:682){
  resp<-s1je[which(s1je[,26]==i),12]
  race<-which(s1JE$IndID==i)
  s1JE$race[race]<-resp
}
# age
for (i in 1:682){
  resp<-as.numeric(s1je[which(s1je[,26]==i),13])
  age<-which(s1JE$IndID==i)
  s1JE$age[age]<-resp
}
# state
for (i in 1:682){
  resp<-s1je[which(s1je[,26]==i),14]
  state<-which(s1JE$IndID==i)
  s1JE$state[state]<-resp
}
# parent
for (i in 1:682){
  resp<-as.numeric(s1je[which(s1je[,26]==i),23])
  parent<-which(s1JE$IndID==i)
  s1JE$parent[parent]<-resp
}
# income
for (i in 1:682){
  resp<-as.numeric(s1je[which(s1je[,26]==i),16])
  income<-which(s1JE$IndID==i)
  s1JE$income[income]<-resp
}
# education
for (i in 1:682){
  resp<-as.numeric(s1je[which(s1je[,26]==i),17])
  education<-which(s1JE$IndID==i)
  s1JE$education[education]<-resp
}
# ideology
for (i in 1:682){
  resp<-as.numeric(s1je[which(s1je[,26]==i),18])
  ideology<-which(s1JE$IndID==i)
  s1JE$ideology[ideology]<-resp
}
# duration
for (i in 1:682){
  resp<-as.numeric(s1je[which(s1je[,26]==i),1])
  duration<-which(s1JE$IndID==i)
  s1JE$duration[duration]<-resp
}
# atpass
for (i in 1:682){
  resp<-as.numeric(s1je[which(s1je[,26]==i),24])
  atpass<-which(s1JE$IndID==i)
  s1JE$atpass[atpass]<-resp
}
# mcpass
for (i in 1:682){
  resp<-as.numeric(s1je[which(s1je[,26]==i),25])
  mcpass<-which(s1JE$IndID==i)
  s1JE$mcpass[mcpass]<-resp
}

# reorder observation
s1JE$observation<-s1JE$observation+647
s1JE$IndID<-s1JE$IndID+647

#####SE####
s1SE<-data.frame("observation" = 1:647,
                 "IndID" = 1:647,
                 "Profile" = 1,
                 "perf" = s1se$perf,
                 "send" = s1se$send,
                 "mode" = "SE",
                 "student" = s1se$rand,
                 "sex" = s1se$sex,
                 "race" = s1se$race,
                 "age" = s1se$age,
                 "state" = s1se$state,
                 "parent" = s1se$par,
                 "income" = s1se$income,
                 "education" = s1se$education,
                 "ideology" = s1se$ideology,
                 "duration" = s1se$duration,
                 "atpass" = s1se$atpass,
                 "mcpass" = s1se$mcpass)

#####Combine####
# combine SE JE datasets
SEJE<-rbind(s1SE,s1JE)
SEJE$student[SEJE$student=="SE1"]<-"White"
SEJE$student[SEJE$student=="SE2"]<-"Black"

####Regression####
SEJE$mode <-as.factor(SEJE$mode)
SEJE$mode = relevel(SEJE$mode, ref="SE")
SEJE$student <-as.factor(SEJE$student)
SEJE$student = relevel(SEJE$student, ref="White")

##performance
# no covariate
mod1<-lm(perf~mode+student+mode*student, data = SEJE)
summary(mod1)
vcov_1 <- cluster.vcov(mod1, SEJE$IndID)
v1<-coeftest(mod1, vcov_1)
v1

# with covariate
mod1c<-lm(perf~mode+student+mode*student
          +sex+race+age+state+parent+income+education+ideology, 
          data = SEJE)
summary(mod1c)
vcov_1c <- cluster.vcov(mod1c, SEJE$IndID)
v1c<-coeftest(mod1c, vcov_1c)
v1c

##school choice
# no covariate
mod2<-lm(send~mode+student+mode*student, data = SEJE)
summary(mod2)
vcov_2 <- cluster.vcov(mod2, SEJE$IndID)
v2<-coeftest(mod2, vcov_2)
v2

# with covariate
mod2c<-lm(send~mode+student+mode*student
          +sex+race+age+state+parent+income+education+ideology, 
          data = SEJE)
summary(mod2c)
vcov_2c <- cluster.vcov(mod2c, SEJE$IndID)
v2c<-coeftest(mod2c, vcov_2c)
v2c

####Table 1####
stargazer(mod1,mod1c,mod2,mod2c,
          style = "apsr",type="html",out="Table1.html",
          se=list(v1[,2],v1c[,2],v2[,2],v2c[,2]),
          p=list(v1[,4],v1c[,4],v2[,4],v2c[,4]),
          covariate.labels=c("JExBlack","Mode: JE",
                             "Students' major Race: Black"),
          column.labels=c("Perceived performance",
                          "Perceived performance (covariates)",
                          "School Choice",
                          "School Choice (covariates)"),
          omit = c("sex","race","age","state","parent","income",
                   "education","ideology"),
          dep.var.labels.include = FALSE,
          intercept.bottom=TRUE,
          order = c("modeJE:studentBlack","modeJE","studentBlack"),
          add.lines = list(c("Covariates", 
                             "No","Yes","No","Yes")),
          keep.stat=c("adj.rsq","n"),
          star.cutoffs = c(0.05, 0.01, 0.001),
          align=TRUE,no.space=TRUE)

####Figure 2####
# performance
SEJE.perf.na.omit<-subset(SEJE,!is.na(perf))
df1 <- summarySE(SEJE.perf.na.omit, measurevar="perf", groupvars=c("mode","student"))
df1$mode <- factor(df1$mode, levels = c("SE","JE"))
df1$student<-ifelse(df1$student=="White","White School","Black School")

p1<-ggplot(df1, aes(x=mode, y=perf, fill=student, color=student)) + 
  geom_bar(stat="identity", width = 0.7, position=position_dodge()) +
  geom_errorbar(aes(ymin=perf-ci, ymax=perf+ci), width=.1,
                position=position_dodge(0.7)) +
  labs(x=element_blank(), y=element_blank(), 
       title="DV: Perceived Performance") +
  scale_x_discrete(labels=c("Separate Evaluation","Joint Evaluation"))+
  theme_article()
p1<-p1+coord_cartesian(ylim=c(50,80))
p1<-p1+theme(legend.title=element_blank(),
             legend.text = element_text(size=12),
             axis.title.y=element_text(size=12),
             axis.text=element_text(size=10),
             legend.justification=c(0,1), legend.position=c(0,1),
             strip.background = element_blank(),
             strip.text.x = element_blank(),
             plot.title = element_textbox(
               hjust = 0.5, margin = margin(t = 5, b = 5)))+
  guides(colour = guide_legend(reverse=F), fill = guide_legend(reverse=F))
p1<-p1+scale_color_manual(values=c("black","black"))

p1

# school choice
SEJE.send.na.omit<-subset(SEJE,!is.na(send))
df2 <- summarySE(SEJE.send.na.omit, measurevar="send", groupvars=c("mode","student"))
df2$mode <- factor(df2$mode, levels = c("SE","JE"))
df2$student<-ifelse(df2$student=="White","White School","Black School")

p2<-ggplot(df2, aes(x=mode, y=send, fill=student, color=student)) + 
  geom_bar(stat="identity", width = 0.7, position=position_dodge()) +
  geom_errorbar(aes(ymin=send-ci, ymax=send+ci), width=.1,
                position=position_dodge(0.7)) +
  labs(x=element_blank(), y=element_blank(),
       title="DV: School Choice") +
  scale_x_discrete(labels=c("Separate Evaluation","Joint Evaluation"))+
  theme_article()
p2<-p2+coord_cartesian(ylim=c(50,80))
p2<-p2+theme(legend.title=element_blank(),
             legend.text = element_text(size=12),
             axis.title.y=element_text(size=12),
             axis.text=element_text(size=10),
             legend.justification=c(0,1), legend.position=c(0,1),
             plot.title = element_textbox(
               hjust = 0.5, margin = margin(t = 5, b = 5)))+
  guides(colour = guide_legend(reverse=F), fill = guide_legend(reverse=F))
p2<-p2+scale_color_manual(values=c("black","black"))

p2

f2.plot<-ggarrange(p1,p2+theme(axis.ticks.y = element_blank(),
                               axis.text.y = element_blank()), 
                   ncol = 2, nrow = 1)

f2.plot<-plot_grid(f2.plot)
f2.plot
#ggsave(file = "Figure2.jpg", width = 6, height = 3, dpi = 666)

####Appendix A####
#####A2####
##Characteristics of sample

# gender dummy
s1$female<-ifelse(s1$sex==0,1,0)
# race
s1$white<-ifelse(s1$race=="white",1,0)
s1$black<-ifelse(s1$race=="black",1,0)
s1$his<-ifelse(s1$race=="hispanic",1,0)
s1$asian<-ifelse(s1$race=="asian",1,0)
s1$other<-ifelse(s1$race=="other",1,0)
# age
s1$young<-ifelse(s1$age<=29,1,0)
s1$midage<-ifelse(s1$age>29 & s1$age<=49,1,0)
s1$old<-ifelse(s1$age>49,1,0)
# income
s1$low<-ifelse(s1$income==1,1,0)
s1$med<-ifelse(s1$income>=2 & s1$income<5,1,0)
s1$high<-ifelse(s1$income>=5,1,0)
# education
s1$hed<-ifelse(s1$education>=5,1,0)
# ideology
s1$con<-ifelse(s1$ideology>=4,1,0)
s1$lib<-ifelse(s1$ideology<=2,1,0)
s1$mod<-ifelse(s1$ideology==3,1,0)

# subgroup single and pair
s1.se1<-subset(s1, rand=="SE1")
s1.se2<-subset(s1, rand=="SE2")
s1.je<-subset(s1, rand=="JE")

## ANOVA F test (randomization check)
# gender
female.aov <- aov(female ~ rand, data = s1)
summary(female.aov)
female<-summary(female.aov)[[1]][,5][1]
male.aov <- aov(sex ~ rand, data = s1)
summary(male.aov)
male<-summary(male.aov)[[1]][,5][1]
# race
wht.aov <- aov(white ~ rand, data = s1)
summary(wht.aov)
wht<-summary(wht.aov)[[1]][,5][1]
blk.aov <- aov(black ~ rand, data = s1)
summary(blk.aov)
blk<-summary(blk.aov)[[1]][,5][1]
his.aov <- aov(his ~ rand, data = s1)
summary(his.aov)
his<-summary(his.aov)[[1]][,5][1]
asian.aov <- aov(asian ~ rand, data = s1)
summary(asian.aov)
asian<-summary(asian.aov)[[1]][,5][1]
other.aov <- aov(other ~ rand, data = s1)
summary(other.aov)
other<-summary(other.aov)[[1]][,5][1]
# age
young.aov <- aov(young ~ rand, data = s1)
summary(young.aov)
young<-summary(young.aov)[[1]][,5][1]
mid.aov <- aov(midage ~ rand, data = s1)
summary(mid.aov)
midage<-summary(mid.aov)[[1]][,5][1]
old.aov <- aov(old ~ rand, data = s1)
summary(old.aov)
old<-summary(old.aov)[[1]][,5][1]
# income
low.aov <- aov(low ~ rand, data = s1)
summary(low.aov)
low<-summary(low.aov)[[1]][,5][1]
med.aov <- aov(med ~ rand, data = s1)
summary(med.aov)
med<-summary(med.aov)[[1]][,5][1]
high.aov <- aov(high ~ rand, data = s1)
summary(high.aov)
high<-summary(high.aov)[[1]][,5][1]
# education
hed.aov <- aov(hed ~ rand, data = s1)
summary(hed.aov)
hed<-summary(hed.aov)[[1]][,5][1]
# parent
parent.aov <- aov(par ~ rand, data = s1)
summary(parent.aov)
parent<-summary(parent.aov)[[1]][,5][1]
# ideology
lib.aov <- aov(lib ~ rand, data = s1)
summary(lib.aov)
lib<-summary(lib.aov)[[1]][,5][1]
con.aov <- aov(con ~ rand, data = s1)
summary(con.aov)
con<-summary(con.aov)[[1]][,5][1]
mod.aov <- aov(mod ~ rand, data = s1)
summary(mod.aov)
mod<-summary(mod.aov)[[1]][,5][1]

#####Table A1####
des_stat<-matrix(NA,18,9)
rownames(des_stat)<-c("Female","Male",
                      "White","Black","Hispanic","Asian","Other",
                      "Age: 18-29","Age: 30-49","Age: >= 50",
                      "Income: < $25k",
                      "Income: $25k to $75k",
                      "Income: >= $75k",
                      "College degree",
                      "Conservative","Liberal","Moderate",
                      "Parenthood")
colnames(des_stat)<-c("Frequency","%","Frequency","%",
                      "Frequency","%","Frequency","%",
                      "P-value")

tol.sum<-round(apply(s1[,c(26,11,27:41,23)],
                     2,sum,na.rm=T),digits=0)
se1.sum<-round(apply(s1.se1[,c(26,11,27:41,23)],
                     2,sum,na.rm=T),digits=0)
se2.sum<-round(apply(s1.se2[,c(26,11,27:41,23)],
                     2,sum,na.rm=T),digits=0)
je.sum<-round(apply(s1.je[,c(26,11,27:41,23)],
                    2,sum,na.rm=T),digits=0)

tol.p<-round(apply(s1[,c(26,11,27:41,23)],
                   2,pcentFun),digits=0)
se1.p<-round(apply(s1.se1[,c(26,11,27:41,23)],
                   2,pcentFun),digits=0)
se2.p<-round(apply(s1.se2[,c(26,11,27:41,23)],
                   2,pcentFun),digits=0)
je.p<-round(apply(s1.je[,c(26,11,27:41,23)],
                  2,pcentFun),digits=0)

des_stat[,1]<-tol.sum
des_stat[,3]<-se2.sum
des_stat[,5]<-se1.sum
des_stat[,7]<-je.sum
des_stat[,2]<-tol.p
des_stat[,4]<-se2.p
des_stat[,6]<-se1.p
des_stat[,8]<-je.p
des_stat[,9]<-round(c(female,male,wht,blk,his,asian,other,
                      young,midage,old,low,med,high,hed,
                      con,lib,mod,parent),2)

a1<-xtable(x=des_stat, digits = c(0,0,0,0,0,0,0,0,0,2))
#print.xtable(A1,type="html",file="a1.html")

#####A3####
# manipulation check
SEJEmcat<-subset(SEJE, mcpass==1 & atpass==1)

#####Figure A1####
df1.mcat <- summarySE(SEJEmcat, measurevar="perf", groupvars=c("mode","student"))
df1.mcat$mode <- factor(df1.mcat$mode, levels = c("SE","JE"))
df1.mcat$student<-ifelse(df1.mcat$student=="White","White School","Black School")

p1.mcat<-ggplot(df1.mcat, aes(x=mode, y=perf, fill=student, color=student)) + 
  geom_bar(stat="identity", width = 0.7, position=position_dodge()) +
  geom_errorbar(aes(ymin=perf-ci, ymax=perf+ci), width=.1,
                position=position_dodge(0.7)) +
  labs(x=element_blank(), y=element_blank(), 
       title="DV: Perceived Performance") +
  scale_x_discrete(labels=c("Separate Evaluation","Joint Evaluation"))+
  theme_article()
p1.mcat<-p1.mcat+coord_cartesian(ylim=c(50,80))
p1.mcat<-p1.mcat+theme(legend.title=element_blank(),
                       legend.text = element_text(size=12),
                       axis.title.y=element_text(size=12),
                       axis.text=element_text(size=10),
                       legend.justification=c(0,1), legend.position=c(0,1),
                       strip.background = element_blank(),
                       strip.text.x = element_blank(),
                       plot.title = element_textbox(
                         hjust = 0.5, margin = margin(t = 5, b = 5)))+
  guides(colour = guide_legend(reverse=F), fill = guide_legend(reverse=F))
p1.mcat<-p1.mcat+scale_color_manual(values=c("black","black"))

p1.mcat

df2.mcat <- summarySE(SEJEmcat, measurevar="send", groupvars=c("mode","student"))
df2.mcat$mode <- factor(df2.mcat$mode, levels = c("SE","JE"))
df2.mcat$student<-ifelse(df2.mcat$student=="White","White School","Black School")

p2.mcat<-ggplot(df2.mcat, aes(x=mode, y=send, fill=student, color=student)) + 
  geom_bar(stat="identity", width = 0.7, position=position_dodge()) +
  geom_errorbar(aes(ymin=send-ci, ymax=send+ci), width=.1,
                position=position_dodge(0.7)) +
  labs(x=element_blank(), y=element_blank(),
       title="DV: School Choice") +
  scale_x_discrete(labels=c("Separate Evaluation","Joint Evaluation"))+
  theme_article()
p2.mcat<-p2.mcat+coord_cartesian(ylim=c(50,80))
p2.mcat<-p2.mcat+theme(legend.title=element_blank(),
                       legend.text = element_text(size=12),
                       axis.title.y=element_text(size=12),
                       axis.text=element_text(size=10),
                       legend.justification=c(0,1), legend.position=c(0,1),
                       plot.title = element_textbox(
                         hjust = 0.5, margin = margin(t = 5, b = 5)))+
  guides(colour = guide_legend(reverse=F), fill = guide_legend(reverse=F))
p2.mcat<-p2.mcat+scale_color_manual(values=c("black","black"))

p2.mcat

f1.mcat.plot<-ggarrange(p1.mcat,p2.mcat+theme(axis.ticks.y = element_blank(),
                                              axis.text.y = element_blank()), 
                        ncol = 2, nrow = 1)

f1.mcat.plot<-plot_grid(f1.mcat.plot)
f1.mcat.plot
#ggsave(file = "A1.jpg", width = 6, height = 3, dpi = 666)
